# -*- coding: utf-8 -*-
"""
Created on Mon Mar 19 13:59:37 2018

@author: lizheng
"""

from pyspark import SparkConf, SparkContext
from pyspark.sql import SparkSession 
from pyspark.sql import SQLContext
from pyspark.sql.types import *
spark=SparkSession.builder.appName("lz").getOrCreate()  
sc = SparkContext.getOrCreate()  


#（b）要读入的文件的全路径
rdd = sc.textFile('hdfs://centos4:8020/user/hive/traindata_6512677_3657.csv')
#Output:file:\\\C:\Users\Yu\0JulyLearn\5weekhadoopspark\names\yob1880.txt MapPartitionsRDD[3] at textFile at NativeMethodAccessorImpl.java:0
print("fffff")
print(rdd.count())
print("aaaaa")
rdd.map()

import jieba
jieba.load_userdict('file://root/lizheng/ciku.txt')

#Output:'Mary,F,7065'

lines = sc.textFile('hdfs://centos4:8020/user/hive/traindata_6512677_3657.csv',3)
header = lines.first()#第一行 
lines = lines.filter(lambda row:row != header)#删除第一行
lines.count()